/* "Efficiency and water use: Dynamic effects of irrigation technology adoption"
by Micah Cameron-Harp and Nathan Hendricks

Code written by Micah Cameron-Harp
May 16th, 2024

This do file creates the TWFE ATT and dynamic treatment effect estimates,
	and it also displays the sub-sample information in Table 2. Subsequent 
	do files combine these estimates with those produced using the 
	Callaway & Sant'Anna estimator in R to generate figures 3, 4, and 5.
*/

/* NOTE - Global macros for directories are created in the main_text_results.do 
	file which calls this individual .do file */
	
*Create log file
log using "$dr_output_log\twfe_att_es", replace

********************************************************************************
***************** Flood to Center Pivot or LEPA ********************************
********************************************************************************
use "$dr_temp\wrg_collapsed.dta", replace
	*Check how many WR_GROUPS are missing aquifer characteristics
	xtset WR_GROUP wua_year

/* Address missing and extreme values */
		*Drop wr_groups who don't have predevelopment characteristics
		keep if !missing(sy)
		keep if !missing(predev_sat)
		keep if !missing(predev_dtw)
		keep if !missing(hyd_cond)
		*Drop wr_groups if they have missing precip or soil data
		drop if missing(jan_april_mean_ppt)
		drop if missing(awc)
		*Drop wr_groups if they have missing authorized quantities or acreage
		drop if missing(wrg_authquant_IRR_umw) | missing(wrg_authquant_all_umw) | missing(wrg_auth_irr_acres)
		*Drop observations with recorded withdrawals but no irrigated acres and vice-versa
		drop if af_used_irr>0 & acres_irr==0
		drop if acres_irr>0 & af_used_irr==0
		*Create a irrigation intensity outcome variable
		bysort WR_GROUP wua_year: gen depth_applied = af_used_irr/acres_irr
		*Drop extreme values for af_used, acres_irr 
			sum af_used, d
				drop if af_used >= r(p99) & af_used < .
			sum acres_irr, d
				drop if acres_irr >= r(p99) & acres_irr < .
			sum depth_applied, d
				drop if depth_applied >= r(p99) & depth_applied < .	

/* Address technologies outside study, multiple adopters and reverters */
	*Drop wr_groups that don't have systems codes we're interested in *
		drop if type_system_2 > 0 | type_system_5 > 0 | type_system_6 > 0 | ///
			type_system_7 > 0 | type_system_8 > 0 | type_system_9 > 0
	*Generate indicator for cp or lepa
		gen type_system_3_4 = type_system_3==1 | type_system_4==1
	*Check how many groups switch into each final system type multiple times
		gen switch_flood_cplepa = l.type_system_1==1 & type_system_3_4==1
			by WR_GROUP: egen num_switch_flood_cplepa = total(switch_flood_cplepa)
	*Create variables showing reversion from one to another, and how many times they revert
		gen reversion_cplepa_flood = l.type_system_3_4==1 & type_system_1==1
			by WR_GROUP: egen num_revers_cplepa_flood = total(reversion_cplepa_flood)
	*Get rid of multiple switchers or reverters
		drop if num_switch_flood_cplepa>=2
		drop if num_revers_cplepa_flood>=1
	*Create variable with gmd number
		gen gmd = 0
		forvalues i=1(1)5 {
			replace gmd = `i' if gmd_`i'==1
		}
	*Drop groups with fractional system_codes
		foreach var of varlist type_system_1 type_system_3 type_system_4 {
			keep if `var'==1 | `var'==0
		}
	
/* Label variables */
	label var wua_year "Year"
	label var af_used "Acre-feet applied"
	label var acres_irr "Irrigated acres"
	label var depth_applied "Depth applied - acre-feet applied per acre"
	label var awc "Available water capacity"
	label var sandtotal "Sand content (%)"
	label var silttotal "Silt content (%)"
	label var slope "Slope"
	label var jan_april_mean_ppt "Preseason precipitation (mm)"
	label var may_sep_mean_ppt "Growing season precipitation (mm)"
	label var jan_april_mean_ET0_elev "Preseason evapotranspiration, using elevation from ssurgo"
	label var may_sep_mean_ET0_elev "Growing season evapotranspiration, using elevation from ssurgo"
	label var jan_april_mean_ET0_Har "Preseason evapotranspiration"
	label var may_sep_mean_ET0_Har "Growing season evapotranspiration"

***************** Creation of treatment and cohort variables *******************
distinct(WR_GROUP) if num_switch_flood_cplepa==1 /*1660 WR_GROUPs with an observed switch from flood to cp or lepa */
distinct WR_GROUP  /* still at 12,922 wrgs */

/*Check how many are switches from flood to cp versus flood straight to LEPA */
	gen switch_flood_cp = l.type_system_1==1 & type_system_3==1
		by WR_GROUP: egen num_switch_flood_cp = total(switch_flood_cp)
	gen switch_flood_lepa = l.type_system_1==1 & type_system_4==1
		by WR_GROUP: egen num_switch_flood_lepa = total(switch_flood_lepa)
	*Isolate true controls and not-yet-treated units
	bysort WR_GROUP: egen any_flood = max(type_system_1)
	bysort WR_GROUP: egen any_cp_or_lepa = max(type_system_3_4)
		*Check to see if we have any cp or lepa users who we don't observe the switch for
		tab any_cp_or_lepa if num_switch_flood_cplepa==0 
			*DROP THEM
			drop if any_cp_or_lepa==1 & num_switch_flood_cplepa==0 
			distinct WR_GROUP /*This brings us down to 2817 */
				/* But remember this switch variable is constructed based on 
				seeing a switch between consecutive years */
		*Check to see if we have lepa obervations for people who changed in 1990
		tab any_flood if any_cp_or_lepa==1  /*We don't have any */
		distinct(WR_GROUP) if any_flood==0 & any_cp_or_lepa==1
			*Drop them if they existed
			drop if any_flood==0 & any_cp_or_lepa==1
			
	*Now check to see how many remain
	distinct(WR_GROUP) /* 2817 water right groups in total */
	distinct(WR_GROUP) if num_switch_flood_cplepa==0 /*   1157 never treated water right groups in total */
		tab type_system_1 if num_switch_flood_cplepa==0 /* All of which are never-treated and only use flood */
	distinct(WR_GROUP) if num_switch_flood_cplepa==1 /* 1,660 water right groups make a switch at some point */
	
	*Create variable for first year adopting cp or lepa from flood to be the cohort year
		tab switch_flood_cplepa wua_year
		gen switch_year_flood_cplepa = switch_flood_cplepa*wua_year
		tab switch_year_flood_cplepa if switch_year_flood_cplepa!=0 
		by WR_GROUP: egen cohort_flood_cplepa = max(switch_year_flood_cplepa), missing
		tab cohort_flood_cplepa, m
		tab cohort_flood_cplepa if cohort_flood_cplepa!=0
		*Check remaining dataset
		xtdescribe 
		
	*Drop observations where they switched backed to flood from cp or lepa but we didn't observe it
		tab type_system_1 if wua_year>=cohort_flood_cplepa & cohort_flood_cplepa!=0 /*There are 79 of these observations */
			gen false_nyt = 0
			replace false_nyt = 1 if type_system_1==1 & cohort_flood_cplepa!=0 & wua_year>=cohort_flood_cplepa
			bysort WR_GROUP: egen false_contr = max(false_nyt)	
			drop if false_contr==1

	*Check how many make the switch and when
		preserve
		keep WR_GROUP cohort_flood_cplepa
		duplicates drop
		tab cohort_flood_cplepa if cohort_flood_cplepa!=0
		restore
		/* 1628 water right groups make the switch at some point */ 

***************** Table 2 - Flood to cp or LEPA column *************************
*Create indicator of treatment status, showing not-yet-treated and treated through time
	gen treat_status = 0
	replace treat_status = 1 if wua_year >= cohort_flood_cplepa & cohort_flood_cplepa!=0
	distinct(WR_GROUP) if treat_status==1 & wua_year <= 2015 
		/* 1,596 of the 1,628 WR_GROUPs who make the switch from 
			flood to cp or lepa do so between 1991 and 2015*/
	distinct(WR_GROUP) if cohort_flood_cplepa>2015 /* 32 nyt who adopt after 2015 */
	distinct(WR_GROUP) /* 2,785 total wrgs */
	distinct(WR_GROUP) if any_flood==1 & any_cp_or_lepa==0 /* 1,157 controls */
	distinct(WR_GROUP) if any_flood==1 & any_cp_or_lepa==1 /* 1,628 treated */

/* Create new variable that is the acre-feet applied in the earliest year recorded for each WRG */
	bysort WR_GROUP: egen min_year = min(wua_year)
	bysort WR_GROUP: gen init_af_used = af_used_irr if min_year==wua_year
	bysort WR_GROUP: egen init_af_used_2 = min(init_af_used)
	replace init_af_used = init_af_used_2
	drop init_af_used_2

	/* Save dataset before dropping never-treated because we will use it 
		for the bacon decomposition results in Table B.1 and Figure B.4 */
		export delimited using "$dr_temp\flood_cporlepa_prepped_bacon.csv", replace
	
	//////////////////////////////////
	///// DROP NEVER TREATED /////////
	//////////////////////////////////
	keep if num_switch_flood_cplepa==1

/* Export to perform Callaway and Sant'Anna estimation in R*/
export delimited using "$dr_temp\flood_cporlepa_prepped.csv", replace

********* Flood to CP or LEPA - TWFE estimates for Figures 3, 4, and 5 *********
*Limit to 1991-2015
keep if inrange(wua_year, 1991, 2015) 
xtset WR_GROUP wua_year
xtdescribe
	putexcel set "$dr_temp\twfe.xlsx", ///
		sheet("floodtocplepa_91_15_nyt") modify
	putexcel A1 = "dep_var"
	putexcel B1 = "effect estimated"
	putexcel C1 = "estimate"
	putexcel D1 = "se_estimate"
	putexcel E1 = "lb_estimate"
	putexcel F1 = "ub_estimate"
	putexcel G1 = "panel_mean_dep_var"
	putexcel H1 = "scaled_estimate"
	putexcel I1 = "scaled_lb"
	putexcel J1 = "scaled_ub"
	local i = 2
	foreach y in "af_used"  "acres_irr" "depth_applied" {
		putexcel A`i' = "`y'"
		putexcel B`i' = "effect_average"
		sum `y'
		local var_mean = r(mean)
			putexcel G`i' = `var_mean'
		xtreg `y' treat_status ///
			jan_april_mean_ppt may_sep_mean_ppt jan_april_mean_ET0_elev may_sep_mean_ET0_elev i.wua_year ///
			, fe vce(cluster WR_GROUP)
		putexcel C`i' = _b[treat_status]
		putexcel D`i' = _se[treat_status]
		local ++i
	}

*Now do event study
	// Create variable showing time relative to treatment for eventdd
	gen t_to = wua_year - cohort_flood_cplepa  
	replace t_to = . if cohort_flood_cplepa==0
	putexcel set "$dr_temp\twfe_event_study.xlsx", ///
		sheet("floodtocplepa_91_15_nyt") modify
	putexcel A1 = "stata_name"
	putexcel B1 = "ci_low_95"
	putexcel C1 = "estimate"
	putexcel D1 = "ci_high_95"
	putexcel E1 = "effect_estimated"
	putexcel F1 = "dep_var"
	putexcel G1 = "panel_mean_dep_var"
	local i = 0
	foreach y in "af_used"  "acres_irr" "depth_applied" {
		local starter = 2 + `i'*48
		local ender = `starter' + 24
		local end_vec = `starter' + 47
		*Label the periods being estimated (relative to treatment)
			*Pre-treatment
			local pre_start = `starter'
			local pre_stop = `ender' - 1
			local cur_eff = -1
			forvalues eff_cell=`pre_start'/`pre_stop' {
				putexcel E`eff_cell' = `cur_eff'
				local cur_eff = `cur_eff' - 1
			}
			*Post-treatment
			local post_start = `ender'
			local post_stop = `end_vec'
			local cur_eff = 0
			forvalues eff_cell=`post_start'/`post_stop' {
				putexcel E`eff_cell' = `cur_eff'
				local cur_eff = `cur_eff' + 1
			}
		*Label dependent variable
		putexcel F`starter':F`end_vec' = "`y'"
		*Add sub-sample mean of dep var
		sum `y'
		local var_mean = r(mean)
			putexcel G`starter':G`end_vec' = `var_mean'
		*perform event study and save results
			qui eventdd `y' jan_april_mean_ppt may_sep_mean_ppt ///
				jan_april_mean_ET0_elev may_sep_mean_ET0_elev i.wua_year, ///
				timevar(t_to) method(fe, cluster(WR_GROUP)) graph_op(leg(off))		
		putexcel A`starter' = matrix(e(leads))
		putexcel A`ender' = matrix(e(lags))
		local ++i
	}

********************************************************************************
***************** Center Pivot to LEPA *****************************************
********************************************************************************
use "$dr_temp\wrg_collapsed.dta", replace
	*Check how many WR_GROUPS are missing aquifer characteristics
	xtset WR_GROUP wua_year
	xtdescribe  /* There are 18,541 WR_GROUP's in this dataset only using wuadet_key records with IRR umw_code */

/*Create a irrigation intensity outcome variable */
	bysort WR_GROUP wua_year: gen depth_applied = af_used_irr/acres_irr
	
/* Address missing and extreme values */
	*Drop wr_groups who don't have predevelopment characteristics
	keep if !missing(sy)
	keep if !missing(predev_sat)
	keep if !missing(predev_dtw)
	keep if !missing(hyd_cond)
	*Drop wr_groups if they have missing precip or soil data
	drop if missing(jan_april_mean_ppt)
	drop if missing(awc)
	*Drop wr_groups if they have missing authorized quantities or acreage
	drop if missing(wrg_authquant_IRR_umw) | missing(wrg_authquant_all_umw) | missing(wrg_auth_irr_acres)
	*Drop observations with recorded withdrawals but no irrigated acres and vice-versa
	drop if af_used_irr>0 & acres_irr==0
	drop if acres_irr>0 & af_used_irr==0
	*Drop extreme values for af_used, acres_irr 
		sum af_used, d
			drop if af_used >= r(p99) & af_used < .
		sum acres_irr, d
			drop if acres_irr >= r(p99) & acres_irr < .
		sum depth_applied, d
			drop if depth_applied >= r(p99) & depth_applied < .		
	*Change total precipitation and evapotranspiration variables to inches
	foreach var of varlist total_preseason_ET0_elev total_growseason_ET0_elev ///
		total_preseason_ET0_Har total_growseason_ET0_Har total_preseason_ppt ///
		total_growseason_ppt {
			replace `var' = `var'/25.4
		}
	
/* Label variables */
	label var wua_year "Year"
	label var af_used "Acre-feet applied"
	label var acres_irr "Irrigated acres"
	label var depth_applied "Depth applied - acre-feet applied per acre"
	label var awc "Available water capacity"
	label var sandtotal "Sand content (%)"
	label var silttotal "Silt content (%)"
	label var slope "Slope"
	label var jan_april_mean_ppt "mean monthly preseason precipitation (mm)"
	label var may_sep_mean_ppt "mean monthly growing season precipitation (mm)"
	label var jan_april_mean_ET0_elev "mean daily preseason evapotranspiration, using elevation from ssurgo"
	label var may_sep_mean_ET0_elev "mean daily growing season evapotranspiration, using elevation from ssurgo"
	label var jan_april_mean_ET0_Har "mean daily preseason evapotranspiration"
	label var may_sep_mean_ET0_Har "mean daily growing season evapotranspiration"

*Remove wr_groups with other technologies or with fractional data
*for systems codes we're interested in *
	drop if type_system_2 > 0 | type_system_5 > 0 | type_system_6 > 0 ///
	| type_system_7 > 0 | type_system_8 > 0 | type_system_9 > 0
	*Drop fractional system_codes
		foreach var of varlist type_system_1 type_system_3 type_system_4 {
			keep if `var'==1 | `var'==0
		}

*Check how many groups switch into each final system type multiple times
	gen switch_flood_cp = l.type_system_1==1 & type_system_3==1
		by WR_GROUP: egen num_switch_flood_cp = total(switch_flood_cp)
		tab num_switch_flood_cp
	gen switch_flood_lepa = l.type_system_1==1 & type_system_4==1
		by WR_GROUP: egen num_switch_flood_lepa = total(switch_flood_lepa)
		tab num_switch_flood_lepa
	gen switch_cp_lepa = l.type_system_3==1 & type_system_4==1
		by WR_GROUP: egen num_switch_cp_lepa = total(switch_cp_lepa)
		tab num_switch_cp_lepa
	gen switch_flood_cplepa = l.type_system_1==1 & type_system_3==1
		/* Also include those who switch from flood straight to LEPA */
		replace switch_flood_cplepa = 1 if l.type_system_1==1 & type_system_4==1
		by WR_GROUP: egen num_switch_flood_cplepa = total(switch_flood_cplepa)
		tab num_switch_flood_cplepa

*Create variables showing reversion from one to another, and how many times they revert
	gen reversion_cp_flood = l.type_system_3==1 & type_system_1==1
		by WR_GROUP: egen num_revers_cp_flood = total(reversion_cp_flood)
		tab num_revers_cp_flood
	gen reversion_lepa_flood = l.type_system_4==1 & type_system_1==1
		by WR_GROUP: egen num_revers_lepa_flood = total(reversion_lepa_flood)
		tab num_revers_lepa_flood
	gen reversion_lepa_cp = l.type_system_4==1 & type_system_3==1
		by WR_GROUP: egen num_revers_lepa_cp = total(reversion_lepa_cp)
		tab num_revers_lepa_cp
	gen reversion_cplepa_flood = l.type_system_3==1 & type_system_1==1
		replace reversion_cplepa_flood = 1 if l.type_system_4==1 & type_system_1==1
		by WR_GROUP: egen num_revers_cplepa_flood = total(reversion_cplepa_flood)
		tab num_revers_cplepa_flood

*Drop multiple switchers or reverters
	drop if num_switch_flood_cp>=2
	drop if num_revers_cp_flood>=1
	drop if num_switch_flood_lepa>=2
	drop if num_revers_lepa_flood>=1
	drop if num_switch_cp_lepa>=2
	drop if num_revers_lepa_cp>=1
	drop if num_switch_flood_cplepa>=2
	drop if num_revers_cplepa_flood>=1
		
*Create variable with gmd number
	gen gmd = 0
	forvalues i=1(1)5 {
		replace gmd = `i' if gmd_`i'==1
	}

***************** Creation of treatment and cohort variables *******************
	*Keep only traditional center pivot and LEPA water right groups
	keep if type_system_3==1 | type_system_4==1 
	xtdescribe // *Now we have 7405 total water right groups
	distinct(WR_GROUP) if num_switch_cp_lepa==1 /*3824 who with an observable switch from cp to lepa */
	distinct(WR_GROUP) if num_switch_cp_lepa==0 /* 3,581 wr_groups who only have cp */
	xtdescribe //7405 total

	*Check sample stats and make sure treatment and control groups are accurate
	sum af_used acres_irr depth_applied
	bysort WR_GROUP: egen any_cp = max(type_system_3)
	bysort WR_GROUP: egen any_lepa = max(type_system_4)
		*Check to see if we have any lepa users who we don't observe the switch for
		tab any_lepa if num_switch_cp_lepa==0 
		*DROP THEM
		drop if any_lepa==1 & num_switch_cp_lepa==0 /* 45,812 observations deleted */
		xtdescribe //4097 wrgs
	
	*Create variable for first year of treatment to be the cohort year
	tab switch_cp_lepa wua_year
	gen switch_year = switch_cp_lepa*wua_year
	tab switch_year
	by WR_GROUP: egen cohort_cp_lepa = max(switch_year), missing
	tab cohort_cp_lepa
	*Check if some reverted from lepa to cp, but we didn't see it, and then later they switched from cp to lepa
		gen false_nyt = 0
		replace false_nyt = 1 if type_system_4==1 & wua_year<cohort_cp_lepa
		replace false_nyt = 1 if type_system_3==1 & cohort_cp_lepa>0 & wua_year>=cohort_cp_lepa
		bysort WR_GROUP: egen false_contr = max(false_nyt)	
		drop if false_contr==1
		xtdescribe //3989 wrgs
		
	*Create an indicator of treatment status
	gen treat_status = type_system_4
		*Check that it's working right 
		tab treat_status if cohort_cp_lepa>0 & wua_year>=cohort_cp_lepa
		tab type_system_3 if treat_status==1

********************* Table 2 - CP to LEPA column ******************************
	*Check final number of control and treated WR_GROUPS
	distinct(WR_GROUP) /*  3989 total WR_GROUPS remaining */
	distinct(WR_GROUP) if num_switch_cp_lepa==1 & cohort_cp_lepa > 0 /* 3716 who with an observable switch from cp to lepa */
		distinct(WR_GROUP) if cohort_cp_lepa>0 /* Just double checking this is also 3716 */
	distinct(WR_GROUP) if cohort_cp_lepa==0 /* Only 273 pure controls */
		
	/* Create new variable that is the acre-feet applied in the earliest year recorded for each WRG */
	bysort WR_GROUP: egen min_year = min(wua_year)
	bysort WR_GROUP: gen init_af_used = af_used_irr if min_year==wua_year
	bysort WR_GROUP: egen init_af_used_2 = min(init_af_used)
	replace init_af_used = init_af_used_2
	drop init_af_used_2

	/* Export for using in R */
	export delimited using "$dr_temp\cp_lepa_prepped.csv", replace

********* CP to LEPA - TWFE estimates for Figures 3, 4, and 5 ******************
	*Compare to two-way fixed effects
	xtset WR_GROUP wua_year
	xtdescribe
	putexcel set "$dr_temp\twfe.xlsx", ///
		sheet("cptolepa_91_19") modify
	putexcel A1 = "dep_var"
	putexcel B1 = "effect estimated"
	putexcel C1 = "estimate"
	putexcel D1 = "se_estimate"
	putexcel E1 = "lb_estimate"
	putexcel F1 = "ub_estimate"
	putexcel G1 = "panel_mean_dep_var"
	putexcel H1 = "scaled_estimate"
	putexcel I1 = "scaled_lb"
	putexcel J1 = "scaled_ub"
	local i = 2
	foreach y in "af_used"  "acres_irr" "depth_applied" {
		putexcel A`i' = "`y'"
		putexcel B`i' = "effect_average"
		sum `y'
		local var_mean = r(mean)
			putexcel G`i' = `var_mean'
		xtreg `y' treat_status ///
			jan_april_mean_ppt may_sep_mean_ppt jan_april_mean_ET0_elev may_sep_mean_ET0_elev i.wua_year ///
			, fe vce(cluster WR_GROUP)
		putexcel C`i' = _b[treat_status]
		putexcel D`i' = _se[treat_status]
		local ++i
	}

*Now do event study
	// Create variable showing time relative to treatment for eventdd
	gen t_to = wua_year - cohort_cp_lepa  
	replace t_to = . if cohort_cp_lepa==0
	putexcel set "$dr_temp\twfe_event_study.xlsx", ///
		sheet("cptolepa_91_19") modify
	putexcel A1 = "stata_name"
	putexcel B1 = "ci_low_95"
	putexcel C1 = "estimate"
	putexcel D1 = "ci_high_95"
	putexcel E1 = "effect_estimated"
	putexcel F1 = "dep_var"
	putexcel G1 = "panel_mean_dep_var"
	local i = 0
	foreach y in "af_used"  "acres_irr" "depth_applied" {
		local starter = 2 + `i'*56
		local ender = `starter' + 28
		local end_vec = `starter' + 55
		*Label the periods being estimated (relative to treatment)
			*Pre-treatment
			local pre_start = `starter'
			local pre_stop = `ender' - 1
			local cur_eff = -1
			forvalues eff_cell=`pre_start'/`pre_stop' {
				putexcel E`eff_cell' = `cur_eff'
				local cur_eff = `cur_eff' - 1
			}
			*Post-treatment
			local post_start = `ender'
			local post_stop = `end_vec'
			local cur_eff = 0
			forvalues eff_cell=`post_start'/`post_stop' {
				putexcel E`eff_cell' = `cur_eff'
				local cur_eff = `cur_eff' + 1
			}
		*Label dependent variable
		putexcel F`starter':F`end_vec' = "`y'"
		*Add sub-sample mean of dep var
		sum `y'
		local var_mean = r(mean)
			putexcel G`starter':G`end_vec' = `var_mean'
		*perform event study and save results
			qui eventdd `y' jan_april_mean_ppt may_sep_mean_ppt ///
				jan_april_mean_ET0_elev may_sep_mean_ET0_elev i.wua_year, ///
				timevar(t_to) method(fe, cluster(WR_GROUP)) graph_op(leg(off))		
		putexcel A`starter' = matrix(e(leads))
		putexcel A`ender' = matrix(e(lags))
		local ++i
	}

*Close log file
	log close 

